global root_dir = "`1'"

include "$root_dir/code/config/config.do"

cap noi log using ${log_dir}/make_indep_vars.log, replace name(dat)


*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"

capture noi {
***********************************************************************************************************************
* indep_vars.do: Build wages and control variables
* Note: spillovers are built in spillovers.do and stocks come from dep_vars.do
***********************************************************************************************************************




************************************
*prelinamry stuff
************************************

if "${wtype}" == "pauto95"{
	use ${dataset_dir}/dep_vars/bvd_year_depvars_list.dta, clear
	mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}, ukeep(BvD) unmatched(none)
	drop _m
	save ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, replace
}

************************************
* Load country list
************************************
qui do ${code_dir}/config/country_list.do
qui do ${code_dir}/config/labeling_indepvars_function.do

global sectors MANUF TOTAL


*****************************************
* Build indep vars with baseline weights
*****************************************

if "${weight_category}" == "" {

************************************
* Deviations from log GDP
************************************

use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year
* Add GDP PER CAPITA (constant prices, USD)
mmerge year using ${dataset_dir}/indep_vars/loggdp_gap_wide_TOTAL.dta, unmatched(none)

* weights based on 10-years patent portfolio pre-1980, etc

mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_1995=(_m==1)
drop _m
*use gdp shares as weights
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
sort BvD year
foreach vv in lngdpgap {
	foreach ctry of global countrylist1995 {

		*check if the variable exists, create as empty variable if not
		noisily capture confirm variable `vv'_`ctry'
		if _rc != 0 {
			gen `vv'_`ctry' = .
		}

		*country weights per company are on patent weights
		gen weight_`ctry' = share2_all_1995_`ctry'
		*if we do not have patent weights, use GDP weights
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .

		*create weighted GDP gaps 
		gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
	}

	*collect the total weighted GDP gap, avoid missing values by treating "." as 0
	egen `vv'_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
		
	*Select home based on largest patent weight
	gen `vv'_shr_home_1995_wtd = .
	egen maxshare = rowmax(weight_??)

	*set the home share to the country with the largest weight.
	* a quick note on this: if there are multiple countries with the same (largest) weight,
	* the last one would be selected and overwrite the previous one. 
	* I checked for this and it never seems to be the case (PS July '24)
	foreach ctry of global countrylist1995 {
		replace `vv'_shr_home_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
	}
	*generate the foreign share as the complement to the home share
	gen `vv'_shr_foreign_1995_wtd = `vv'_ALL_1995_wtd - `vv'_shr_home_1995_wtd
	replace `vv'_shr_foreign_1995_wtd = . if maxshare == 1

	* Version 2: take home country wage and average foreign country wage (i.e. normalize)
	gen `vv'_shr2_home_1995_wtd = `vv'_shr_home_1995_wtd / maxshare
	gen `vv'_shr2_foreign_1995_wtd = `vv'_shr_foreign_1995_wtd / (1-maxshare)
	drop maxshare
	drop *`vv'_wtd_* weight_??
}
*drop unneeded variables
drop share*
drop lngdpgap_??

*labelling 
labelingvars "lngdpgap"

compress
save ${dataset_dir}/indep_vars/bvd_year_devgdp_sharesgdpweighted${weight_window}_${wtype}.dta, replace


************************************
* GDP per capita
************************************


local aMANUF "m"
local aTOTAL "t"
local fMANUF "manuf"
local fTOTAL "totind"

foreach sector in $sectors {

	local m `a`sector''
	*load firms
	use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
	keep BvD year

	* Add GDP PER CAPITA (constant prices using different deflators (MP, LP, DP, MG), USD)
	mmerge year using ${dataset_dir}/indep_vars/gdp_percapita_wide_`sector'.dta, unmatched(none)

	* weights based on 10-years patent portfolio pre-1980, etc
	mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
	gen missing_weights_1995=(_m==1)
	drop _m
	*use gdp shares as weights
	cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
	sort BvD year
	foreach vv in gdppcDP gdppcLP gdppcMP gdppcMG {
		*check if the variable exists, create as empty variable if not
		foreach ctry of global countrylist1995{
			noisily capture confirm variable `vv'_`ctry'
			if _rc != 0 {
				gen `vv'_`ctry' = .
			}
			*generate weights based on patent shares or gdp shares
			gen weight_`ctry' = share2_all_1995_`ctry'
			replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
			
			*make weighted gdppc values and ln versions
			gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
			gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
		}

		*collect the total weighted GDP per capita, avoid missing values by treating "." as 0
		egen `vv'`m'_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
		egen ln`vv'`m'_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing
		
		* select home based on largest patent weight
		gen `vv'`m'_shr_home_1995_wtd = .
		egen maxshare = rowmax(weight_??)
		*set the homesahre of the sector to the country with the largest weight
		foreach ctry of global countrylist1995 {
			replace `vv'`m'_shr_home_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
		}
		*generate foreign share as remainder
		gen `vv'`m'_shr_foreign_1995_wtd = `vv'`m'_ALL_1995_wtd - `vv'`m'_shr_home_1995_wtd
		replace `vv'`m'_shr_foreign_1995_wtd = . if maxshare == 1

		* Version 2: take home country wage and average foreign country wage (i.e. normalize)
		gen `vv'`m'_shr2_home_1995_wtd = `vv'`m'_shr_home_1995_wtd / maxshare
		gen `vv'`m'_shr2_foreign_1995_wtd = `vv'`m'_shr_foreign_1995_wtd / (1-maxshare)
		drop maxshare
		drop *`vv'_wtd_* weight_??

	}

	*drop unneeded variables
	drop share*
	drop gdppc??_??

	*labelling
	labelingvars "gdppc"

	compress
	save ${dataset_dir}/indep_vars/bvd_year_gdpcap_`f`sector''_sharesgdpweighted${weight_window}_${wtype}.dta, replace
}


************************************
* Wage and VA emp for manufacturing
************************************

local aMANUF "m"
local aTOTAL "t"
local fMANUF "manuf"
local fTOTAL "totind"

foreach sector in $sectors {
	local m `a`sector''

	*import firms
	use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
	keep BvD year

	* Add wages
	mmerge year using ${dataset_dir}/indep_vars/lswages_wide_`sector'.dta, unmatched(none)
	mmerge year using ${dataset_dir}/indep_vars/hswages_wide_`sector'.dta, unmatched(none)
	mmerge year using ${dataset_dir}/indep_vars/mswages_wide_`sector'.dta, unmatched(none)
	* note: no wage data in 2010 (kept for lagging dep var)

	* Add VA per employee
	mmerge year using ${dataset_dir}/indep_vars/vaemployee_wide_`sector'.dta, unmatched(none)

	* HQ Country:
	gen ctry=substr(BvD,1,2)

	* weights based on 10-years patent portfolio pre-1980, etc
	mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
	gen missing_weights_1995=(_m==1)
	drop _m

	*use gdp shares as weights
	cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
	sort BvD year
	foreach vv in lswLP lswDP lswMP lswMG hswLP hswDP hswMP hswMG mswMP vaempLP vaempDP vaempMP vaempMG {

		foreach ctry of global countrylist1995 {
			*here we should have all variables, no need to create new ones for the sake of running
			confirm variable `vv'_`ctry'

			*generate weights based on patent shares or gdp shares if missing
			gen weight_`ctry' = share2_all_1995_`ctry'
			replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
			
			*make weighted values and ln versions
			gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
			gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
		}

		*collect the total weighted values, avoid missing values by treating "." as 0
		egen `vv'`m'_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
		egen ln`vv'`m'_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing

		
		* select home based on largest patent weight
		gen ln`vv'`m'_shr_home_1995_wtd = .
		gen `vv'`m'_shr_home_1995_wtd = .
		egen maxshare = rowmax(weight_??)

		*set the home share to the country with the largest weight
		foreach ctry of global countrylist1995 {
			replace ln`vv'`m'_shr_home_1995_wtd = ln`vv'_wtd_`ctry' if maxshare==weight_`ctry'
			replace `vv'`m'_shr_home_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
		}

		*generate the foreign share as the complement to the home share
		gen ln`vv'`m'_shr_foreign_1995_wtd = ln`vv'`m'_ALL_1995_wtd - ln`vv'`m'_shr_home_1995_wtd
		gen `vv'`m'_shr_foreign_1995_wtd = `vv'`m'_ALL_1995_wtd - `vv'`m'_shr_home_1995_wtd
		replace ln`vv'`m'_shr_foreign_1995_wtd = . if maxshare == 1
		replace `vv'`m'_shr_foreign_1995_wtd = . if maxshare == 1

		* Version 2: take home country wage and average foreign country wage (i.e. normalize)
		gen ln`vv'`m'_shr2_home_1995_wtd = ln`vv'`m'_shr_home_1995_wtd / maxshare
		gen ln`vv'`m'_shr2_foreign_1995_wtd = ln`vv'`m'_shr_foreign_1995_wtd / (1-maxshare)
		gen `vv'`m'_shr2_home_1995_wtd = `vv'`m'_shr_home_1995_wtd / maxshare
		gen `vv'`m'_shr2_foreign_1995_wtd = `vv'`m'_shr_foreign_1995_wtd / (1-maxshare)

		drop maxshare			
		drop *`vv'_wtd_* weight_??
	}
	drop share*
	drop lsw??_?? msw??_?? hsw??_?? vaemp??_?? ctry

	*labelling
	labelingvars "lsw"
	labelingvars "hsw"
	labelingvars "msw"
	labelingvars "vaemp"

	compress
	save ${dataset_dir}/indep_vars/bvd_year_wages_vaemp_`f`sector''_sharesgdpweighted${weight_window}_${wtype}.dta, replace
}


************************************
* Minimum wage for manufacturing
************************************
*this section is very similar to the normal vaemp and wages section.
*But here we include minimum wages. We only compute those for a single deflator (MP = PPI 1995 Dollars) and sector (MANUF).
*The reason why this is separate is down to the countrylist being different for minimum wages, causing
*us to have to restrict the country list for the other variables too, which in turn changes the shares for them.

use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year
* Add wages
mmerge year using ${dataset_dir}/indep_vars/minwages_wide_MANUF.dta, unmatched(none)
* Country:
gen ctry=substr(BvD,1,2)
* weights based on 10-years patent portfolio pre-1980, etc
mmerge year using ${dataset_dir}/indep_vars/lswages_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/hswages_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/gdp_percapita_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/loggdp_gap_wide_TOTAL.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/vaemployee_wide_MANUF.dta, unmatched(none)

*Theoretically we would have had to adjust the patent weights for the minum wage here too, as we do for the longterm interest rates
*Why do we use the base versions? Two reasons: comparability to the baseline table 5, and we only noticed after the first submission to JPE that we had neglected to do this.
*So in the interest of consistency, it stays this way.
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_minw_1995=(_m==1)
drop _m
*GDP set for minimum wages; use gdp shares as weights
cross using ${dataset_dir}/weights/shares_GDP_allctries_forMINW_1995.dta
sort BvD year
foreach vv in lngdpgap minwMP lswMP hswMP vaempMP gdppcMP {
	if "`vv'" == "lngdpgap" {
		local m ""
	}
	else {
		local m "m"
	}

	*make variables if they are missing
	foreach ctry of global countrylistMINW1995 {
		noisily capture confirm variable `vv'_`ctry'
		if _rc != 0 {
			gen `vv'_`ctry' = .
		}
	*generate weights and weighted values	
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
		
		gen `vv'_mtd_`ctry' = weight_`ctry'* `vv'_`ctry'
		gen ln`vv'_mtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
	}

	*generate total weighted values
	egen `vv'`m'_ALL_1995_mtd = rowtotal(`vv'_mtd_??), missing
	egen ln`vv'`m'_ALL_1995_mtd = rowtotal(ln`vv'_mtd_??), missing
	* Based on largest weight
	gen ln`vv'`m'_shr_home_1995_mtd = .
	gen `vv'`m'_shr_home_1995_mtd = .
	*computes the maximum share among minW country subset, not for all countries!
		
	egen maxshare = rowmax(weight_??)
	egen all_shrs`vv' = rowtotal(weight_??)
	gen maxweightctry_minw_`vv' = ""

	*the following loop should: 1. replace the first 2 variables with that of the country with the max share (the home country), 
	*2. replace the third variable with the country code of the country with the max share, ie the home country (within the minwage dataset)
	*note that its still done across the standard weights, for reasoning see above
	foreach ctry of global countrylistMINW1995 {

		replace ln`vv'`m'_shr_home_1995_mtd = ln`vv'_mtd_`ctry' if maxshare==weight_`ctry'
		replace `vv'`m'_shr_home_1995_mtd = `vv'_mtd_`ctry' if maxshare==weight_`ctry'
		replace maxweightctry_minw_`vv'  = "`ctry'" if maxshare==weight_`ctry'
					}

	* make sure maximum share is 1 if only country in set,
	replace maxshare = 1 if maxshare == all_shrs`vv'
	drop all_shrs

	*this will produce 0 foreign shares for countries that are now domestic.
	gen ln`vv'`m'_shr_foreign_1995_mtd = ln`vv'`m'_ALL_1995_mtd - ln`vv'`m'_shr_home_1995_mtd
	gen `vv'`m'_shr_foreign_1995_mtd = `vv'`m'_ALL_1995_mtd - `vv'`m'_shr_home_1995_mtd

	*this may cause observations to be dropped in cols 7-9 of the minwage table
	replace ln`vv'`m'_shr_foreign_1995_mtd = . if maxshare == 1
	replace `vv'`m'_shr_foreign_1995_mtd = . if maxshare == 1

	* Version 2: take home country wage and average foreign country wage (i.e. normalize)
	gen ln`vv'`m'_shr2_home_1995_mtd = ln`vv'`m'_shr_home_1995_mtd / maxshare
	gen ln`vv'`m'_shr2_foreign_1995_mtd = ln`vv'`m'_shr_foreign_1995_mtd / (1-maxshare)
	gen `vv'`m'_shr2_home_1995_mtd = `vv'`m'_shr_home_1995_mtd / maxshare
	gen `vv'`m'_shr2_foreign_1995_mtd = `vv'`m'_shr_foreign_1995_mtd / (1-maxshare)
			
	drop *`vv'_mtd_* weight_??
	ren maxshare minwhctry_weight_`vv'
		
}
ren maxweightctry_minw_minwMP minwhctry
drop vaemp??_?? lsw??_?? hsw??_?? gdppc??_?? lngdpgap_?? minwMP_??
drop minwhctry_weight_lngdpgap minwhctry_weight_lswMP minwhctry_weight_hswMP minwhctry_weight_vaempMP minwhctry_weight_gdppcMP
drop share*
drop  maxweightctry_minw_lngdpgap maxweightctry_minw_gdppcMP maxweightctry_minw_vaempMP maxweightctry_minw_hswMP maxweightctry_minw_lswMP

*labeling
labelingvars "minw"
labelingvars "lsw"
labelingvars "hsw"
labelingvars "vaemp"
labelingvars "gdppc"
labelingvars "lngdpgap"

compress
save ${dataset_dir}/indep_vars/bvd_year_minwage_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, replace


************************************
* Long-term interest rates
************************************

*again a similar principle to the minwage section. the countrylist is not quite the same, so we have to restrict the countrylist for the other variables too.
*Also, again, we only do this for our main deflator and manufacturing sector

if "${weight_window}" == "_from1970" | "${weight_window}" == "_10yr" | "${weight_window}" == "_f1970t1989" {

*load firms
use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year

* Add wages, VA emp, GDP per capita, log GDP gap and interest rates
mmerge year using ${dataset_dir}/indep_vars/interestrates_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/lswages_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/hswages_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/gdp_percapita_wide_MANUF.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/loggdp_gap_wide_TOTAL.dta, unmatched(none)
mmerge year using ${dataset_dir}/indep_vars/vaemployee_wide_MANUF.dta, unmatched(none)

* HQ Country:
gen ctry=substr(BvD,1,2)

* weights based on 10-years patent portfolio pre-1980, etc, a specific set for the longterm interest rate
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr_forLintr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_lintr_1995=(_m==1)
drop _m

*GDP set for Longterm interest rates; use gdp shares as weights
cross using ${dataset_dir}/weights/shares_GDP_allctries_forLintr_1995.dta
sort BvD year

foreach vv in lngdpgap lintrMP lswMP hswMP vaempMP gdppcMP {

	if "`vv'" == "lngdpgap" {
		local m ""
	}
	else {
		local m "m"
	}

	*make variables if they are missing, use patent weights or GDP weights as replacement and generate weighted values
	*these we denote not by wtd but by itd (interest rate patent weighted
	foreach ctry of global countrylistLintr1995 {
		noisily capture confirm variable `vv'_`ctry'
		if _rc != 0 {
			gen `vv'_`ctry' = .
		}
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
			
		gen `vv'_itd_`ctry' = weight_`ctry'* `vv'_`ctry'
		gen ln`vv'_itd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
	}

	*generate total weighted values
	egen `vv'`m'_ALL_1995_itd = rowtotal(`vv'_itd_??), missing
	egen ln`vv'`m'_ALL_1995_itd = rowtotal(ln`vv'_itd_??), missing
	
	*Based on largest weight, select the homecountry and assign the home share
	gen ln`vv'`m'_shr_home_1995_itd = .
	gen `vv'`m'_shr_home_1995_itd = .
	egen maxshare = rowmax(weight_??)
	foreach ctry of global countrylistLintr1995 {
		replace ln`vv'`m'_shr_home_1995_itd = ln`vv'_itd_`ctry' if maxshare==weight_`ctry'
		replace `vv'`m'_shr_home_1995_itd = `vv'_itd_`ctry' if maxshare==weight_`ctry'
	}

	*generate the foreign share as the complement to the home share
	gen ln`vv'`m'_shr_foreign_1995_itd = ln`vv'`m'_ALL_1995_itd - ln`vv'`m'_shr_home_1995_itd
	gen `vv'`m'_shr_foreign_1995_itd = `vv'`m'_ALL_1995_itd - `vv'`m'_shr_home_1995_itd
	replace ln`vv'`m'_shr_foreign_1995_itd = . if maxshare == 1
	replace `vv'`m'_shr_foreign_1995_itd = . if maxshare == 1

	* Version 2: take home country wage and average foreign country wage (i.e. normalize)
	gen ln`vv'`m'_shr2_home_1995_itd = ln`vv'`m'_shr_home_1995_itd / maxshare
	gen ln`vv'`m'_shr2_foreign_1995_itd = ln`vv'`m'_shr_foreign_1995_itd / (1-maxshare)
	gen `vv'`m'_shr2_home_1995_itd = `vv'`m'_shr_home_1995_itd / maxshare
	gen `vv'`m'_shr2_foreign_1995_itd = `vv'`m'_shr_foreign_1995_itd / (1-maxshare)

	drop maxshare			
	drop *`vv'_itd_* weight_??
}

*save the home country
egen maxweight_lintr_1995 = rowmax(share2_*)
gen country_shr_lintr_1995 = ""

foreach ctry of global countrylistLintr1995 {
	replace country_shr_lintr_1995 = "`ctry'" if maxweight_lintr_1995==share2_all_1995_`ctry'
}

drop share*
drop lintr??_?? ?sw??_?? vaemp??_?? gdppc??_?? lngdpgap_?? ctry lnln*

*labelling
labelingvars "lngdpgap"
labelingvars "vaemp"
labelingvars "lsw"
labelingvars "hsw"
labelingvars "gdppc"
labelingvars "lintr"


compress
save ${dataset_dir}/indep_vars/bvd_year_lintr_manuf_sharesgdpweighted${weight_window}_${wtype}.dta, replace

}

*************************************************
* Firm country and multinational/domestic indicator
*************************************************

*load firms
use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year

* Create a dummy variable if a firm is multinational
*merge in weights
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_1995=(_m==1)

*get homecounztry based on largest patent weight
egen maxshare = rowmax(share2_all_1995_*)
gen country_shr_1995 = ""
foreach ctry of global countrylist1995 {
	replace country_shr_1995 = "`ctry'" if maxshare==share2_all_1995_`ctry'
}
ren maxshare maxweight_1995
drop share2*

keep BvD year country* maxweight_* 

*labeling
label var country_shr_1995 "Home country by patent weight"
label var maxweight_1995 "Weight of the home country def. by patents"

compress
save ${dataset_dir}/indep_vars/bvd_year_country_multinational${weight_window}_${wtype}.dta, replace

************************************
* Share for manufacturing
************************************

*load firms
use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year

* Add manufacturing share
mmerge year using ${dataset_dir}/indep_vars/manufshare_wide_MANUF.dta, unmatched(none)

*weights based on 10-years patent portfolio pre-1980, etc
*merge in patent weights
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_1995=(_m==1)
drop _m

*use gdp shares as weights
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
sort BvD year
foreach vv in mshare {

	*check if the variable exists, create as empty variable if not
	foreach ctry of global countrylist1995 {
		noisily capture confirm variable `vv'_`ctry'
		if _rc != 0 {
			gen `vv'_`ctry' = .
		}
		*generate weights based on patent shares or gdp shares
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
		*make weighted values
		gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
	}

	*collect the total weighted values, avoid missing values by treating "." as 0
	egen `vv'm_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing

	* homecountry based on largest patent weight
	gen `vv'm_shr_home_1995_wtd = .
	egen maxshare = rowmax(weight_??)

	*set the home share homecountry's share
	foreach ctry of global countrylist1995 {
		replace `vv'm_shr_home_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
	}

	*generate the foreign share as the complement to the home share
	gen `vv'm_shr_foreign_1995_wtd = `vv'm_ALL_1995_wtd - `vv'm_shr_home_1995_wtd
	replace `vv'm_shr_foreign_1995_wtd = . if maxshare == 1

	* Version 2: take home country wage and average foreign country wage (i.e. normalize)
	gen `vv'm_shr2_home_1995_wtd = `vv'm_shr_home_1995_wtd / maxshare
	gen `vv'm_shr2_foreign_1995_wtd = `vv'm_shr_foreign_1995_wtd / (1-maxshare)

	drop maxshare
	drop *`vv'_wtd_* weight_??
}
drop share*
drop mshare_??

*labeling
labelingvars "mshare"

compress
save ${dataset_dir}/indep_vars/bvd_year_manufshare_sharesgdpweighted${weight_window}_${wtype}.dta, replace

************************************
* Manufacturing Value added absolute 
************************************

*load firms
use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year

* Add manufacturing VA
mmerge year using ${dataset_dir}/indep_vars/manufVA_wide_MANUF.dta, unmatched(none)

*load patent weights based on 10-years patent portfolio pre-1980, etc
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_1995=(_m==1)
drop _m

*use gdp shares as weights
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
sort BvD year
foreach vv in mVA {
	foreach ctry of global countrylist1995 {
		*check if the variable exists, create as empty variable if not
		noisily capture confirm variable `vv'_`ctry'
		if _rc != 0 {
			gen `vv'_`ctry' = .
		}
		*generate weights based on patent shares or gdp shares
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .
		*make weighted values and ln versions
		gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
		gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
	}

	*collect the total weighted values, avoid missing values by treating "." as 0
	egen `vv'm_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
	egen ln`vv'm_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing


	*Homecountry and its weight based on largest patent weight
	gen ln`vv'm_shr_home_1995_wtd = .
	gen `vv'm_shr_home_1995_wtd = .
	egen maxshare = rowmax(weight_??)

	*set the home share to the ne from country with the largest weight; also for ln version
	foreach ctry of global countrylist1995{
		replace ln`vv'm_shr_home_1995_wtd = ln`vv'_wtd_`ctry' if maxshare==weight_`ctry'
		replace `vv'm_shr_home_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
	}

	*generate the foreign share as the complement to the home share
	gen ln`vv'm_shr_foreign_1995_wtd = ln`vv'm_ALL_1995_wtd - ln`vv'm_shr_home_1995_wtd
	gen `vv'm_shr_foreign_1995_wtd = `vv'm_ALL_1995_wtd - `vv'm_shr_home_1995_wtd
	replace ln`vv'm_shr_foreign_1995_wtd = . if maxshare == 1
	replace `vv'm_shr_foreign_1995_wtd = . if maxshare == 1

	* Version 2: take home country wage and average foreign country wage (i.e. normalize)
	gen ln`vv'm_shr2_home_1995_wtd = ln`vv'm_shr_home_1995_wtd / maxshare
	gen ln`vv'm_shr2_foreign_1995_wtd = ln`vv'm_shr_foreign_1995_wtd / (1-maxshare)
	gen `vv'm_shr2_home_1995_wtd = `vv'm_shr_home_1995_wtd / maxshare
	gen `vv'm_shr2_foreign_1995_wtd = `vv'm_shr_foreign_1995_wtd / (1-maxshare)

	drop maxshare
	drop *`vv'_wtd_* weight_??
}

drop share*
drop mVA_??

*labeling
labelingvars "mVA"

compress
save ${dataset_dir}/indep_vars/bvd_year_manufVA_sharesgdpweighted${weight_window}_${wtype}.dta, replace


**********************************************
* Within-Manuf low-skill weighted VA
**********************************************

*load firms
use ${dataset_dir}/dep_vars/bvd_year_list${weight_window}_${wtype}.dta, clear
keep BvD year

* Add the lsw VA
mmerge year using ${dataset_dir}/indep_vars/mVAls_wide_MANUF.dta, unmatched(none)

* weights based on 10-years patent portfolio pre-1980, etc
mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr${weight_window}_1995_orbis2017_${wtype}.dta, unmatched(master)
gen missing_weights_1995=(_m==1)
drop _m
* use gdp shares as weights if no patent weight
cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta
sort BvD year
foreach vv in mVAls {
	foreach ctry of global countrylist1995 {
		noisily capture confirm variable `vv'_`ctry'
		if _rc != 0 {
			gen `vv'_`ctry' = .
		}

		* generate weights based on patent shares or gdp shares
		gen weight_`ctry' = share2_all_1995_`ctry'
		replace weight_`ctry' = share_GDP_`ctry' if weight_`ctry' == .

		* make weighted values and ln versions
		gen `vv'_wtd_`ctry' = weight_`ctry'* `vv'_`ctry'
		gen ln`vv'_wtd_`ctry' = weight_`ctry'* ln(`vv'_`ctry')
	}

	* collect the total weighted values, avoid missing values by treating "." as 0
	egen `vv'm_ALL_1995_wtd = rowtotal(`vv'_wtd_??), missing
	egen ln`vv'm_ALL_1995_wtd = rowtotal(ln`vv'_wtd_??), missing


	* Homecountry based on largest patent weight, select the homecountry and assign the home share
	gen ln`vv'm_shr_h_1995_wtd = .
	gen `vv'm_shr_h_1995_wtd = .
	egen maxshare = rowmax(weight_??)

	foreach ctry of global countrylist1995{
		replace ln`vv'm_shr_h_1995_wtd = ln`vv'_wtd_`ctry' if maxshare==weight_`ctry'
		replace `vv'm_shr_h_1995_wtd = `vv'_wtd_`ctry' if maxshare==weight_`ctry'
	}

	* generate the foreign share as the complement to the home share
	gen ln`vv'm_shr_fg_1995_wtd = ln`vv'm_ALL_1995_wtd - ln`vv'm_shr_h_1995_wtd
	gen `vv'm_shr_fg_1995_wtd = `vv'm_ALL_1995_wtd - `vv'm_shr_h_1995_wtd
	replace ln`vv'm_shr_fg_1995_wtd = . if maxshare == 1
 	replace `vv'm_shr_fg_1995_wtd = . if maxshare == 1

	* Version 2: take home country wage and average foreign country wage (i.e. normalize)
	gen ln`vv'm_shr2_h_1995_wtd = ln`vv'm_shr_h_1995_wtd / maxshare
	gen ln`vv'm_shr2_fg_1995_wtd = ln`vv'm_shr_fg_1995_wtd / (1-maxshare)
	gen `vv'm_shr2_h_1995_wtd = `vv'm_shr_h_1995_wtd / maxshare
	gen `vv'm_shr2_fg_1995_wtd = `vv'm_shr_fg_1995_wtd / (1-maxshare)

	drop maxshare
	drop *`vv'_wtd_* weight_??
}

drop share*
drop mVAls_?? 

*labeling
labelingvars "mVAls"


compress
save ${dataset_dir}/indep_vars/bvd_year_mVAls95_sharesgdpweighted${weight_window}_${wtype}.dta, replace

} 
*closes bracket of "baseline weight category"

***********************************************************************************************************************
* Build indendent variables (wages and controls) with alternative weights
***********************************************************************************************************************

if "${weight_category}" != "" {

* Import the (complete) bvd_year list of firms
use ${dataset_dir}/dep_vars/bvd_year_list_from1970_${wtype}.dta, clear

* Merge with GDP per capita (constant prices, USD), log GDP GAP and high- and lowskill wages
mmerge year using ${dataset_dir}/indep_vars/gdp_percapita_wide_MANUF.dta, unmatched(none) ukeep(year gdppcMP_??)
mmerge year using ${dataset_dir}/indep_vars/loggdp_gap_wide_MANUF.dta, unmatched(none) ukeep(year lngdpgap_??) 
mmerge year using ${dataset_dir}/indep_vars/lswages_wide_MANUF.dta, unmatched(none) ukeep(year lswMP_??)
mmerge year using ${dataset_dir}/indep_vars/hswages_wide_MANUF.dta, unmatched(none) ukeep(year hswMP_??)
mmerge year using ${dataset_dir}/indep_vars/vaemployee_wide_MANUF.dta, unmatched(none) ukeep(year vaempMP_??)
drop _m
sort BvD year

* Save dataset to loop through without merging
save ${dataset_dir}/indep_vars/bvd_year_combined_MANUF.dta, replace

***********************************************************************************************************************
* Load macros: country list, weight categories and subtypes
***********************************************************************************************************************


* Make a shorter indicator to include in varnames; and make locals out of globals
local wc = substr("${weight_category}", 1, 4)
local weight_category "${weight_category}"
local weight_versions "${weight_versions}"

***********************************************************************************************************************
* Create independent variables: Multiply vars with different weight subtypes
***********************************************************************************************************************

foreach wt of local weight_versions { 

	* Merge with and create specific weights
	use ${dataset_dir}/indep_vars/bvd_year_combined_MANUF.dta, clear
	gen missing_weights_`wc'`wt' = 0
	mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr_`weight_category'_`wt'_orbis2017_${wtype}.dta, unmatched(master)
	replace missing_weights_`wc'`wt'=(_m==1)
	drop _m

	* GDP set for wages; use gdp shares as weights if no patent weight
	cross using ${dataset_dir}/weights/shares_GDP_allctries_1995.dta

	foreach ctry of global countrylist_all {
		gen weight_`wc'`wt'_`ctry' = .
		replace weight_`wc'`wt'_`ctry' = share2_all_`wc'`wt'_`ctry'

		*for the weight category "excluding" we need to adjust the weights to reflect that one country is missing
		if "`weight_category'" ==  "excluding" {
			replace weight_`wc'`wt'_`ctry' = share_GDP_`ctry'/(1-share_GDP_`wt') if weight_`wc'`wt'_`ctry' == .  
		}
		else {
			replace weight_`wc'`wt'_`ctry' = share_GDP_`ctry' if weight_`wc'`wt'_`ctry' == .  
		}
	}
	drop share2_all_`wc'`wt'_*

	* Merge with and create pre-sample baseline weights (used for home/fg definition in some weights)
	gen missing_weights = 0
	mmerge BvD using ${dataset_dir}/weights/bvdid_pat_weights_EPtr_from1970_1995_orbis2017_${wtype}.dta, unmatched(master)
	replace missing_weights=(_m==1)

	* GDP set for wages; use gdp shares as weights if no patent weight
	foreach ctry of global countrylist_all{
		gen weight_1995_`ctry' = .
		replace weight_1995_`ctry' = share2_all_1995_`ctry'
		replace weight_1995_`ctry' = share_GDP_`ctry' if weight_1995_`ctry' == .
	}
	drop share2_all_1995_* share_GDP_*

	* Multiply the independent vars with the weights (with and without ln) to get weighted values (just main deflator)
	foreach vv in lswMP hswMP vaempMP gdppcMP lngdpgap {
		foreach ctry of global countrylist_all{
			noisily capture confirm variable `vv'_`ctry'
			if _rc != 0 { 
				gen `vv'_`ctry' = .
			}
			gen `vv'_wtd_`wc'`wt'_`ctry' = weight_`wc'`wt'_`ctry'* `vv'_`ctry'
			*don't make an ln version for lngdpgap, obviosuly
			if "`vv'" != "lngdpgap" { 
				gen ln`vv'_wtd_`wc'`wt'_`ctry' = weight_`wc'`wt'_`ctry'* ln(`vv'_`ctry') 
			}
		}

		* Generate total weighted values
		egen `vv'_ALL_`wc'`wt'_wtd = rowtotal(`vv'_wtd_`wc'`wt'_??), missing
		if "`vv'" != "lngdpgap" { 
			egen ln`vv'_ALL_`wc'`wt'_wtd = rowtotal(ln`vv'_wtd_`wc'`wt'_??), missing  
		}
	drop `vv'_??
	}

	* Define home and foreign variables - based on "baseline 1995" weight as usual for comparison
	if "`weight_category'" ==  "excluding" {
		*set the home country to the country with the largest weight
		gen home = ""
		egen maxshare_1995 = rowmax(weight_1995_??)
		egen maxshare_`wc'`wt' = rowmax(weight_`wc'`wt'_??)

		*set homecountry share to the one with the largest weight
		foreach ctry of global countrylist_all {
			replace home = "`ctry'" if  maxshare_1995 == weight_1995_`ctry'
			replace maxshare_`wc'`wt' = weight_`wc'`wt'_`ctry' if maxshare_1995 == weight_1995_`ctry'
		}

		*generate home and foreign shares
		foreach vv in lswMP hswMP vaempMP gdppcMP lngdpgap {
			if "`vv'" != "lngdpgap" { 
				gen ln`vv'_shr_h_`wc'`wt'_wtd = .
			}
			gen `vv'_shr_h_`wc'`wt'_wtd = .

			*home shares
			foreach ctry of global countrylist_all {
				if "`vv'" != "lngdpgap" {
					replace ln`vv'_shr_h_`wc'`wt'_wtd = ln`vv'_wtd_`wc'`wt'_`ctry' if maxshare_1995 == weight_1995_`ctry' 
				}
				replace `vv'_shr_h_`wc'`wt'_wtd = `vv'_wtd_`wc'`wt'_`ctry' if maxshare_1995 == weight_1995_`ctry'
			}

			*foreign shares
			if "`vv'" != "lngdpgap" { 
				gen ln`vv'_shr_fg_`wc'`wt'_wtd = ln`vv'_ALL_`wc'`wt'_wtd - ln`vv'_shr_h_`wc'`wt'_wtd
			}
			gen `vv'_shr_fg_`wc'`wt'_wtd = `vv'_ALL_`wc'`wt'_wtd - `vv'_shr_h_`wc'`wt'_wtd
			replace `vv'_shr_fg_`wc'`wt'_wtd = . if maxshare_1995 == 1 
			if "`vv'" != "lngdpgap" { 
				replace ln`vv'_shr_fg_`wc'`wt'_wtd = . if maxshare_1995 == 1 
			}
		}
	}



* Define home and foreign variables - based on "changed/new" weight
if "`weight_category'" ==  "altperiod" { 
	*set the home country to the country with the largest weight for the altperiod (and then its weights), otherwise identical to the above
		gen home = ""
		egen maxshare_`wc'`wt' = rowmax(weight_`wc'`wt'_??)
		foreach ctry of global countrylist_all {
			replace home = "`ctry'" if  maxshare_`wc'`wt' == weight_`wc'`wt'_`ctry'
		}
		foreach vv in lswMP hswMP vaempMP gdppcMP lngdpgap {
			if "`vv'" != "lngdpgap" {
				gen ln`vv'_shr_h_`wc'`wt'_wtd = . 
			}	
			gen `vv'_shr_h_`wt'_wtd = .

			*set the home shares
			foreach ctry of global countrylist_all{
				if "`vv'" != "lngdpgap" {
					replace ln`vv'_shr_h_`wc'`wt'_wtd = ln`vv'_wtd_`wc'`wt'_`ctry' if maxshare_`wc'`wt' == weight_`wc'`wt'_`ctry'
				}	
					replace `vv'_shr_h_`wt'_wtd = `vv'_wtd_`wt'_`ctry' if maxshare_`wc'`wt' == weight_`wc'`wt'_`ctry'
			}	
			*set the foreign shares
			if "`vv'" != "lngdpgap" {
				gen ln`vv'_shr_fg_`wc'`wt'_wtd = ln`vv'_ALL_`wc'`wt'_wtd - ln`vv'_shr_h_`wc'`wt'_wtd 
				replace ln`vv'_shr_fg_`wc'`wt'_wtd = . if maxshare_`wc'`wt'== 1 
			}
			gen `vv'_shr_fg_`wc'`wt'_wtd = `vv'_ALL_`wc'`wt'_wtd - `vv'_shr_h_`wc'`wt'_wtd
			replace `vv'_shr_fg_`wc'`wt'_wtd = . if maxshare_`wc'`wt' == 1
		}
	}

* Normalize with home country variable / average foreign country variable
	foreach vv in lswMP hswMP vaempMP gdppcMP lngdpgap {
		if "`vv'" != "lngdpgap" { 
			gen ln`vv'_shr2_h_`wc'`wt'_wtd = ln`vv'_shr_h_`wc'`wt'_wtd / maxshare_`wc'`wt' 
			gen ln`vv'_shr2_fg_`wc'`wt'_wtd = ln`vv'_shr_fg_`wc'`wt'_wtd / (1-maxshare_`wc'`wt') 
		}
		gen `vv'_shr2_h_`wc'`wt'_wtd = `vv'_shr_h_`wc'`wt'_wtd / maxshare_`wc'`wt'
		gen `vv'_shr2_fg_`wc'`wt'_wtd = `vv'_shr_fg_`wc'`wt'_wtd / (1-maxshare_`wc'`wt')
	}

* Keep only the relevant variables and save
* Note: We use "h" instead of "home" and "fg" instead of "foreign" for the alternatively weighted indep vars. 
* The reason being conflicts with stata's variable name character limit.

	drop *`vv'_wtd_`wc'`wt'_?? weight_* _m

	labelingvars "lsw"
	labelingvars "hsw"
	labelingvars "vaemp"
	labelingvars "gdppc"
	labelingvars "lngdpgap"

	compress
	save ${dataset_dir}/indep_vars/bvd_year_indepvars_sharesgdpweighted_`weight_category'_`wt'_${wtype}.dta, replace
}
}

}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat